/* * Sun Public License Notice * * The contents of this file are subject to the Sun Public License * Version 1.0 (the "License"). You may not use this file except in * compliance with the License. A copy of the License is available at * http://www.sun.com/ * * The Original Code is Forte for Java, Community Edition. The Initial * Developer of the Original Code is Sun Microsystems, Inc. Portions * Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved. */ package org.netbeans.editor.ext; import org.netbeans.editor.Syntax; /** * Lexical analyzer for the plain text. * * @author Miloslav Metelka * @version 1.00 */ public class PlainSyntax extends Syntax { /** Token-id of the plain text */ public static final int TEXT = 0; /* Internal states used internally by analyzer. There * can be any number of them declared by the analyzer. * They are usually numbered starting from zero but they don't * have to. The only reserved value is -1 which is reserved * for the INIT state - the initial internal state of the analyzer. */ private static final int ISI_TEXT = 0; /** This is core function of analyzer and it returns one of following numbers: * a) token number of next token from scanned text * b) EOL when end of line was found in scanned buffer * c) EOT when there is no more chars available in scanned buffer. * * The function scans the active character and does one or more * of the following actions: * 1. change internal analyzer state (state = new-state) * 2. return token ID (return token-ID) * 3. adjust current position to signal different end of token; * the character that offset points to is not included in the token */ protected int parseToken() { // The main loop that reads characters one by one follows while (offset < stopOffset) { char ch = buffer[offset]; // get the current character switch (state) { // switch by the current internal state case INIT: switch (ch) { case '\n': offset++; return EOL; default: state = ISI_TEXT; break; } break; case ISI_TEXT: switch (ch) { case '\n': state = INIT; return TEXT; } break; } // end of switch(state) offset++; // move to the next char } /* At this state there's no more text in the scanned buffer. * The caller will decide either to stop scanning at all * or to relocate scanning and provide next buffer with characters. * The lastBuffer variable indicates whether the scanning will * stop (true) or the caller will provide another buffer * to continue on (false) and call relocate() to continue on the given buffer. * If this is the last buffer, the analyzer must ensure * that for all internal states there will be some token ID returned. * The easiest way how to ensure that all the internal states will * be covered is to copy all the internal state constants and * put them after the switch() and provide the code that will return * appropriate token ID. * * When there are no more characters available in the buffer * and the buffer is not the last one the analyzer can still * decide to return the token ID even if it doesn't know whether * the token is complete or not. This is possible in this simple * implementation for example because it doesn't matter whether * it returns the text all together or broken into several pieces. * The advantage of such aproach is that the preScan value * is minimized which avoids the additional increasing of the buffer * by preScan characters, but on the other hand it can become * problematic if the token should be forwarded for some further * processing. For example it could seem handy to return incomplete * token for java block comments but it could become difficult * if we would want to analyzer these comment tokens additionally * by the HTML analyzer for example. */ // Normally the following block would be done only for lastBuffer == true // but in this case it can always be done switch (state) { case ISI_TEXT: state = INIT; return TEXT; } // need to continue on another buffer return EOT; } } /* * Log * 1 Gandalf 1.0 12/28/99 Miloslav Metelka * $ */